package crawler.proxy.impl;

import crawler.proxy.Proxy;
import crawler.proxy.ProxySpider;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/**
 * USER: lintc 【lintiancong@zhuojianchina.com】
 * DATE: 2017-09-14 10:42
 */
public class NiansProxy extends ProxySpider{

    public NiansProxy() {
        super("http://www.nianshao.me/?stype=2?page=index");
    }

    @Override
    public Set<Proxy> getList(int index) {
        Set<Proxy> list = new HashSet<>();
//        catchUrl = catchUrl.replace("index", index + "");
        Document document = null;
        try {
            document = Jsoup.connect(catchInvokeUrl).get();
            Elements elements = document.select("tr:gt(0)");
            Proxy proxy;
            for (Element element : elements) {
                proxy = new Proxy();
                String ip = element.select("td:eq(0)").first().text();
                String port  = element.select("td:eq(1)").first().text();
                String isAnonymous = element.select("td:eq(3)").first().text();
                if (isAnonymous.contains(filterStr)) {
                    proxy.setIp(ip);
                    proxy.setPort(port);
                    list.add(proxy);
                }
            }
        } catch (IOException e) {
            LOG.error(catchInvokeUrl + ": 抓取失败 (" + e.getMessage() + ")");
        }
        return list;
    }

    public static void main(String[] args) {
        ProxySpider proxySpider = new ThreeThreeSixSixProxy();
        Set<Proxy> set = proxySpider.list(1);
        for (Proxy proxy : set) {
            System.out.println(proxy);
        }
    }
}
